!! Copyright (C) 2009,2010,2011,2012  Marco Restelli
!!
!! This file is part of:
!!   LDGH -- Local Hybridizable Discontinuous Galerkin toolkit
!!
!! LDGH is free software: you can redistribute it and/or modify it
!! under the terms of the GNU General Public License as published by
!! the Free Software Foundation, either version 3 of the License, or
!! (at your option) any later version.
!!
!! LDGH is distributed in the hope that it will be useful, but WITHOUT
!! ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
!! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
!! License for more details.
!!
!! You should have received a copy of the GNU General Public License
!! along with LDGH. If not, see <http://www.gnu.org/licenses/>.
!!
!! author: Marco Restelli                   <marco.restelli@gmail.com>


!> \file
!! Main program for the LDG-H method.
!!
!! \n
!!
!! This program provides a unified implementation of the following
!! three hybrid methods for the steady diffusion-advection-reaction
!! equation:
!! <ul>
!!  <li> Raviart-Thomas method \f$\mathbb{RT}_k\f$
!!  <li> Local Discontinuous Galerkin Hybridizable method
!!  \f$\mathbb{LDG-H}_k\f$
!!  <li> Brezzi-Douglas-Marini method \f$\mathbb{BDM}_k\f$.
!! </ul>
!! The three methods are implemented with hybridization and static
!! condensation; see \c mod_ldgh_locmat for the details of the
!! variational forms.
!!
!! \note The current MPI parallelism is very limited: it is used
!! only in the solution of the linear system when the MUMPS
!! solver is selected. As a consequence, most of the code is executed
!! by the master thread, with the remaining ones doing only part
!! of the initializations and the calls to the MUMPS linear
!! solver.
!<
program ldgh_main

 use mod_utils, only: &
   t_realtime, my_second

 use mod_messages, only: &
   mod_messages_constructor, &
   mod_messages_destructor,  &
   mod_messages_initialized, &
   error,   &
   warning, &
   info

 use mod_kinds, only: &
   mod_kinds_constructor, &
   mod_kinds_destructor,  &
   mod_kinds_initialized, &
   wp

 use mod_fu_manager, only: &
   mod_fu_manager_constructor, &
   mod_fu_manager_destructor,  &
   mod_fu_manager_initialized, &
   new_file_unit

 !$ use mod_omp_utils, only: &
 !$   mod_omp_utils_constructor, &
 !$   mod_omp_utils_destructor,  &
 !$   mod_omp_utils_initialized, &
 !$   detailed_timing_omp, &
 !$   omput_push_key,      &
 !$   omput_pop_key,       &
 !$   omput_start_timer,   &
 !$   omput_close_timer,   &
 !$   omput_write_time

 use mod_octave_io, only: &
   mod_octave_io_constructor, &
   mod_octave_io_destructor,  &
   mod_octave_io_initialized, &
   write_octave, &
   read_octave

 use mod_mpi_utils, only: &
   mod_mpi_utils_constructor, &
   mod_mpi_utils_destructor,  &
   mod_mpi_utils_initialized, &
   mpi_comm_world, &
   mpi_init, mpi_finalize, &
   mpi_comm_rank

 use mod_sympoly, only: &
   mod_sympoly_constructor, &
   mod_sympoly_destructor,  &
   mod_sympoly_initialized, &
   show

 use mod_octave_io_sympoly, only: &
   mod_octave_io_sympoly_constructor, &
   mod_octave_io_sympoly_destructor,  &
   mod_octave_io_sympoly_initialized, &
   write_octave

 use mod_linal, only: &
   mod_linal_constructor, &
   mod_linal_destructor,  &
   mod_linal_initialized, &
   invmat_nop, &
   invmat_chol

 use mod_perms, only: &
   mod_perms_constructor, &
   mod_perms_destructor,  &
   mod_perms_initialized

 use mod_octave_io_perms, only: &
   mod_octave_io_perms_constructor, &
   mod_octave_io_perms_destructor,  &
   mod_octave_io_perms_initialized

 use mod_sparse, only: &
   mod_sparse_constructor, &
   mod_sparse_destructor,  &
   mod_sparse_initialized, &
   ! sparse types
   t_col,       &
   t_tri,       &
   ! construction of new objects
   new_col,     &
   new_tri,     &
   ! convertions
   col2tri,     &
   tri2col,     &
   ! overloaded operators
   operator(+), &
   operator(*), &
   sum,         &
   transpose,   &
   matmul,      &
   clear

 use mod_octave_io_sparse, only: &
   mod_octave_io_sparse_constructor, &
   mod_octave_io_sparse_destructor,  &
   mod_octave_io_sparse_initialized, &
   write_octave

 use mod_linsolver, only: &
   mod_linsolver_constructor, &
   mod_linsolver_destructor,  &
   !1) Parameters which should be set before using the solvers:
   t_nsolver_general_parms, linsolver_general_parms, &
   t_nsolver_umfpack_parms, linsolver_umfpack_parms, &
   t_nsolver_mumps_parms,   linsolver_mumps_parms,   &
   t_nsolver_itersol_parms, linsolver_itersol_parms, &
   !2) Main module functions
   factor, solve, clean

 use mod_output_control, only: &
   mod_output_control_constructor, &
   mod_output_control_destructor,  &
   mod_output_control_initialized, &
   elapsed_format, &
   base_name

 use mod_master_el, only: &
   mod_master_el_constructor, &
   mod_master_el_destructor,  &
   mod_master_el_initialized

 use mod_grid, only: &
   mod_grid_constructor, &
   mod_grid_destructor,  &
   mod_grid_initialized, &
   t_grid,               &
   new_grid, clear,      &
   affmap,               &
   write_octave
 
 use mod_numquad, only: &
   mod_numquad_constructor, &
   mod_numquad_destructor,  &
   mod_numquad_initialized

 use mod_base, only: &
   mod_base_constructor, &
   mod_base_destructor,  &
   mod_base_initialized, &
   t_base, clear, &
   write_octave

 use mod_bcs, only: &
   mod_bcs_constructor, &
   mod_bcs_destructor,  &
   mod_bcs_initialized, &
   b_dir,                 &
   t_bcs, t_b_s, p_t_b_s, &
   new_bcs, clear

 use mod_fe_spaces, only: &
   mod_fe_spaces_constructor, &
   mod_fe_spaces_destructor,  &
   mod_fe_spaces_initialized, &
   rt,      & ! complete f.e. space RT
   ldgh,    & ! complete f.e. space LDG-H
   bdm        ! complete f.e. space BDM

 use mod_testcases, only: &
   mod_testcases_constructor, &
   mod_testcases_destructor,  &
   mod_testcases_initialized, &
   test_name,   &
   test_description,&
   coeff_dir,   &
   coeff_xiadv, &
   coeff_lam,   &
   coeff_q, coeff_divq

 use mod_tau, only: &
   mod_tau_constructor, &
   mod_tau_destructor,  &
   mod_tau_initialized, &
   taus

 use mod_ldgh_locmat, only: &
   mod_ldgh_locmat_constructor, &
   mod_ldgh_locmat_destructor,  &
   mod_ldgh_locmat_initialized, &
   ldgh_locmat, t_bcs_error

 use mod_ldgh_reconstructions, only: &
   mod_ldgh_reconstructions_constructor, &
   mod_ldgh_reconstructions_destructor,  &
   mod_ldgh_reconstructions_initialized, &
   uuu, qqq, qhn, &
   bases, uuus, uuusef, xiadv_el, &
!   base_qsdg, qqqsdg,   &
   base_qsrt, qqqsrt

 use mod_error_norms, only: &
   mod_error_norms_constructor, &
   mod_error_norms_destructor,  &
   mod_error_norms_initialized, &
   hybrid_err, &
   primal_err, &
   primal_err_sef, &
   dual_err

!-----------------------------------------------------------------------
 
 implicit none

!-----------------------------------------------------------------------
 
 ! Parameters
 integer, parameter :: max_char_len = 1000
 character(len=*), parameter :: input_file_name = 'ldgh.in'
 character(len=*), parameter :: this_prog_name = 'ldgh'

 ! Main variables
 logical :: write_grid, write_sys, compute_error_norms, compute_usef
 integer :: k, stab_tauk, stab_bartauk, nbound, err_extra_deg
 integer, allocatable :: bc_type_t(:,:)
 character(len=max_char_len) :: method, grid_file, in_testname, &
   stab_method, in_basename, cbc_type
 type(t_base) :: base
 type(t_grid), target :: grid
 type(t_bcs), target :: bcs

 ! Local matrixes
 integer :: dp, mk, pk, nk
 integer :: pos_shift_ie, pos_shift_isl, pos_shift_igdll, &
   pos_shift_jsl, pos
 integer :: ie, isl, igdll, is, i, il, jsl, jgdll, js, j, jl
 real(wp), allocatable :: &
   aak(:,:) , bbk(:,:) , cck(:,:) ,         &
   ddk(:,:) , eek(:,:) , hhk(:,:) , f2k(:), &
   ffk(:,:) , ggk(:,:) , llk(:,:) , f3k(:), &
  aaki(:,:) , wwk(:,:) , qqk(:,:) ,         &
  ddkaaki(:,:) , vvk(:,:) , uuk(:,:) ,      &
  mmmk(:,:) , fffk(:)
 
 ! Global matrix
 integer :: sdim, nnz_mmm11
 integer, allocatable :: mmmi(:), mmmj(:)
 real(wp), allocatable :: mmmx(:), fff(:), fff1(:), fff2(:), &
  lam(:), lam1(:), rhs(:)
 type(t_col) :: mmm, mmm11, mmm12, mmm21, mmm22

 ! bcs variables
 integer :: is_dir, is_int
 integer, allocatable :: dofs_dir(:), dofs_nat(:)
 real(wp), allocatable :: lam2(:), norm_e2(:)

 ! Linear system solver
 logical :: itsol_abstol
 integer :: mpi_id, umfpack_print_level, precon_ndiags
 real(wp) :: itsol_toll
 character(len=max_char_len) :: linsolver, itsolver, &
   preconditioner

 ! error computations
 real(wp) :: e_l2s, e_l2k, e_pl2s, e_pl2k, e_u_l2, e_us_l2, e_usef_l2, &
   e_q_l2, e_q_hdiv, e_qsdg_l2, e_qsdg_hdiv, e_qsrt_l2, e_qsrt_hdiv

 ! IO variables
 character(len=*), parameter :: out_file_nml_suff = '-nml.octxt'
 character(len=10000+len(out_file_nml_suff)) :: out_file_nml_name
 character(len=*), parameter :: out_file_grid_suff = '-grid.octxt'
 character(len=10000+len(out_file_grid_suff)) :: out_file_grid_name
 character(len=*), parameter :: out_file_base_suff = '-base.octxt'
 character(len=10000+len(out_file_base_suff)) :: out_file_base_name
 character(len=*), parameter :: out_file_results_suff = '-results.octxt'
 character(len=10000+len(out_file_results_suff)) :: out_file_results_name

 ! Auxiliary variables
 integer :: fu, ierr
 real(t_realtime) :: t00, t0, t1
 character(len=10*max_char_len) :: message(3)
 type(t_bcs_error) :: b_err

 ! Input namelist
 namelist /input/ &
   ! test case
   in_testname, &
   ! output base name
   in_basename, &
   ! base
   k, method,   &
   ! grid
   write_grid,  &
   grid_file,   &
   ! stabilization
   stab_method, stab_tauk, stab_bartauk, &
   ! boundary conditions
   nbound, cbc_type, &
   ! linear system solver
   linsolver,   &
   ! UMFPACK control variables
   umfpack_print_level, &
   ! iterative solver control variables (used only if linsolver.eq."iterative")
   itsolver,                 &
   itsol_abstol, itsol_toll, &
   !  precon_ndiags can be larger than the size of the matrix, in
   !  which case the whole matrix is used as preconditioner
   preconditioner, precon_ndiags, &
   ! results
   write_sys,   &
   ! postprocessing
   compute_usef,&
   ! errors
   compute_error_norms, err_extra_deg

 !-----------------------------------------------------------------------
 ! Setup of all the MPI processes
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Preliminary initializations
 t00 = my_second()
 call mod_messages_constructor()

 !$ if(detailed_timing_omp) call mod_omp_utils_constructor()

 call mod_kinds_constructor()

 call mpi_init(ierr)
 call mod_mpi_utils_constructor()
 call mpi_comm_rank(mpi_comm_world,mpi_id,ierr)

 call mod_fu_manager_constructor()

 call mod_octave_io_constructor()

 call mod_sympoly_constructor()
 call mod_octave_io_sympoly_constructor()

 call mod_linal_constructor()

 call mod_perms_constructor()
 call mod_octave_io_perms_constructor()

 call mod_sparse_constructor()
 call mod_octave_io_sparse_constructor()
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Read input file
 call new_file_unit(fu,ierr)
 open(fu,file=trim(input_file_name), &
      status='old',action='read',form='formatted',iostat=ierr)
 read(fu,input)
 close(fu,iostat=ierr)
 ! echo the input namelist
 out_file_nml_name = trim(in_basename)//out_file_nml_suff
 open(fu,file=trim(out_file_nml_name), &
      status='replace',action='write',form='formatted',iostat=ierr)
 write(fu,input)
 close(fu,iostat=ierr)

 allocate(bc_type_t(nbound,2))
 read(cbc_type,*) bc_type_t ! transposed, for simplicity
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! We can now initialize the linsolver constructor on ALL the
 ! processes and set the solver parameters (in fact, such parameters
 ! will not change during the entire execution).
 call mod_linsolver_constructor(trim(linsolver))
 linsolver_general_parms%transposed_mat = .false.
 linsolver_umfpack_parms%umfpack_print_level = &
         umfpack_print_level
 linsolver_itersol_parms%itsolver       = itsolver
 linsolver_itersol_parms%itsol_toll     = itsol_toll
 linsolver_itersol_parms%itsol_abstol   = itsol_abstol
 linsolver_itersol_parms%preconditioner = preconditioner
 linsolver_itersol_parms%precon_ndiags  = precon_ndiags
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! End of the setup of all the MPI processes: from now on, only the
 ! master process executes the code, while the remaining processes are
 ! involved only in the solution of the linear system when the solver
 ! is mumps.
 mpi_if_1: if(mpi_id.eq.0) then
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! output control setup
 call mod_output_control_constructor(in_basename)
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Construct the grid
 t0 = my_second()

 call mod_master_el_constructor()

 call mod_grid_constructor()
 call new_grid(grid,trim(grid_file))

 ! write the octave output
 if(write_grid) then
   out_file_grid_name = trim(base_name)//out_file_grid_suff
   call new_file_unit(fu,ierr)
   open(fu,file=trim(out_file_grid_name), &
        status='replace',action='write',form='formatted',iostat=ierr)
    call write_octave(grid,'grid',fu)
   close(fu,iostat=ierr)
 endif

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed grid construction: elapsed time ',t1-t0,'s.'
 call info(this_prog_name,'',message(1))
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Define the finite element space
 t0 = my_second()

 call mod_numquad_constructor()

 call mod_base_constructor()

 call mod_fe_spaces_constructor()
 select case(trim(method))
  case('rt')
   base = rt(grid%me,k)
  case('ldgh')
   base = ldgh(grid%me,k)
  case('bdm')
   base = bdm(grid%me,k)
  case default
   write(message(1),'(A,A,A)') 'Unknown method "',trim(method),'".'
   call error(this_prog_name,'',message(1))
 end select

 ! write the octave output
 out_file_base_name = trim(base_name)//out_file_base_suff
 call new_file_unit(fu,ierr)
 open(fu,file=trim(out_file_base_name), &
      status='replace',action='write',form='formatted',iostat=ierr)
   call write_octave(base,'base',fu)
 close(fu,iostat=ierr)

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed FE basis construction: elapsed time ',t1-t0,'s.'
 call info(this_prog_name,'',message(1))
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Collect the information concerning the boundary conditions
 call mod_bcs_constructor()
 call new_bcs(bcs,grid,transpose(bc_type_t))
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Test case setup
 call mod_testcases_constructor(in_testname,grid%m)
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Define the stabilization parameter tau
 !> \bug check the definition of the \f$\tau\f$ parameter
 call mod_tau_constructor(trim(stab_method),stab_tauk,stab_bartauk, &
                          grid,base,write_grid)
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Assemble matrix and right hand side
 call mod_ldgh_locmat_constructor(base)

 t0 = my_second()
 !$ if(detailed_timing_omp) then
 !$   call omput_push_key("LocalMatrices")
 !$   call omput_start_timer()
 !$ endif

 !$omp parallel private(dp , mk , pk , nk ,    &
 !$omp                  aak , bbk , cck ,      &
 !$omp                  ddk , eek , hhk , f2k, &
 !$omp                  ffk , ggk , llk , f3k, &
 !$omp                 aaki , wwk , qqk ,      &
 !$omp              ddkaaki , vvk , uuk ,      &
 !$omp                 mmmk , fffk,            &
 !$omp                  ie , b_err , pos_shift_ie , &
 !$omp                  isl , pos_shift_isl , igdll , &
 !$omp                  pos_shift_igdll , is, i , il , &
 !$omp                  jsl , pos_shift_jsl , jgdll , &
 !$omp                  js, j , jl , pos,      &
 !$omp                  message)&
 !$omp           shared(base,grid,bcs,sdim,mmmi,mmmj,mmmx,fff, &
 !$omp                  taus)   &
 !$omp          default(none)

 dp = base%me%d+1
 mk = base%mk
 pk = base%pk
 nk = base%nk
 allocate( aak(   mk,mk) , bbk(   mk,pk) , cck(   mk,dp*nk) ,            &
           ddk(   pk,mk) , eek(   pk,pk) , hhk(   pk,dp*nk) , f2k(   pk),&
           ffk(dp*nk,mk) , ggk(dp*nk,pk) , llk(dp*nk,dp*nk) , f3k(dp*nk),&
           aaki(  mk,mk) , wwk(   mk,pk) , qqk(   mk,dp*nk) ,            &
          ddkaaki(pk,mk) , vvk(   pk,pk) , uuk(   pk,dp*nk) ,            &
          mmmk(dp*nk,dp*nk), fffk(dp*nk) )
 !$omp single
  sdim = grid%ns*nk ! problem dimension
  allocate( mmmi(grid%ne*(dp*nk)**2) , mmmj(grid%ne*(dp*nk)**2) )
  allocate( mmmx(grid%ne*(dp*nk)**2) , fff(sdim) )
  fff = 0.0_wp
 !$omp end single

 !$omp do schedule(static)
 elem_loop: do ie=1,grid%ne

   !---------------------------------------------------------------------
   !1) compute local matrices
   call ldgh_locmat( aak , bbk , cck ,      &
                     ddk , eek , hhk , f2k, &
                     ffk , ggk , llk , f3k, &
                     base, grid%e(ie), taus(ie), &
                     bcs%b_e2be(ie), b_err )

   if(b_err%lerr) &
     call error(this_prog_name,'',b_err%message)
   !---------------------------------------------------------------------

   !---------------------------------------------------------------------
   !2) static condensation

   ! aaki = inv(aak)
   call invmat_chol( aak , aaki )
   ! ddkaaki = ddk*aaki
   ddkaaki = matmul(ddk,aaki)

   ! vvk  = inv(eek - ddk*aaki*bbk)
   call invmat_nop( eek - matmul(ddkaaki,bbk) , vvk )
   ! uuk  = vvk*(ddk*aaki*cck - hhk)
   uuk = matmul( vvk , matmul(ddkaaki,cck)-hhk )
    
   ! qqk  = -aaki*(bbk*uuk + cck)
   qqk = -matmul( aaki , matmul(bbk,uuk)+cck )
   ! wwk  = -aaki*bbk*vvk
   wwk = -matmul(aaki,matmul(bbk,vvk))
    
   ! mmmk = ffk*qqk + ggk*uuk + llk
   mmmk = matmul(ffk,qqk) + matmul(ggk,uuk) + llk
   ! fffk = f3k - (ffk*wwk + ggk*vvk)*f2k
   fffk = f3k - matmul( matmul(ffk,wwk)+matmul(ggk,vvk) , f2k)
   !---------------------------------------------------------------------

   !---------------------------------------------------------------------
   !3.1) side oriented summation of the element contributions
   ! The summation is done in two steps, exploting the operations
   ! defined for sparse matrices:
   ! a) the contributions of each element are collected into three
   !  vectors: indices and value, without doing any summation of the
   !  contributions of different elements;
   ! b) the sparse matrix is constructed and reduced: the reduction
   !  operation sums the element contributions.
   ! Notice that step b) has to be done outside the element loop.
   pos_shift_ie = (ie-1)*(dp*nk)**2
   iside_do: do isl=1,dp
     pos_shift_isl = pos_shift_ie + (isl-1)*nk*dp*nk
     igdl_do: do igdll=1,nk
       pos_shift_igdll = pos_shift_isl + (igdll-1)*dp*nk
       ! global side index
       is = grid%e(ie)%is(isl)
       ! matrix index i
       i  =  (is-1)*nk + igdll
       ! local matrix index i
       il = (isl-1)*nk + igdll
       jside_do: do jsl=1,dp
         pos_shift_jsl = pos_shift_igdll + (jsl-1)*nk
         jgdl_do: do jgdll=1,nk
           ! global side index
           js = grid%e(ie)%is(jsl)
           ! matrix index i
           j  =  (js-1)*nk + jgdll
           ! local matrix index i
           jl = (jsl-1)*nk + jgdll

           ! store the values in the unreduced matrix
           pos = pos_shift_jsl + jgdll
           mmmi(pos) = i
           mmmj(pos) = j
           mmmx(pos) = mmmk(il,jl)

         enddo jgdl_do
       enddo jside_do

       ! right hand side
       !$omp atomic
        fff(i) = fff(i) + fffk(il)

     enddo igdl_do
   enddo iside_do
   !---------------------------------------------------------------------

 enddo elem_loop
 !$omp end do nowait

 deallocate( aak , bbk , cck ,      &
             ddk , eek , hhk , f2k, &
             ffk , ggk , llk , f3k, &
            aaki , wwk , qqk ,      &
         ddkaaki , vvk , uuk ,      &
            mmmk , fffk )

 !$omp end parallel
 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed computation of the local matrices: elapsed time ',t1-t0,'s.'
 call info(this_prog_name,'',message(1))
 !$ if(detailed_timing_omp) then
 !$   call omput_write_time()
 !$   call omput_close_timer()
 !$   call omput_pop_key()
 !$ endif

 !-----------------------------------------------------------------------
 !3.2) complete the side oriented summation of the element contributions
 ! notice that indexes in sparse matrices start from 0
 mmm = tri2col(new_tri(sdim,sdim,mmmi-1,mmmj-1,mmmx))
 deallocate( mmmi , mmmj , mmmx )

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed assembling of the linear system: elapsed time ',t1-t0,'s.'
 call info(this_prog_name,'',message(1))
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Partition mmm and the rhs according to the Dirichlet bcs
 t0 = my_second()
 allocate(dofs_dir( base%nk * count(bcs%b_side%bc.eq.b_dir) ))
 allocate(lam2( size(dofs_dir) ))
 allocate(dofs_nat( base%nk*(grid%ns) - size(dofs_dir) ))
 is_dir = 0
 is_int = 0
 do is=1,grid%ni
   is_int = is_int+1
   dofs_nat( (is_int-1)*base%nk+1 : is_int*base%nk ) = &
      (/ (igdll, igdll=(is-1)*base%nk,is*base%nk-1) /)
 enddo
 allocate(norm_e2(base%nk)) ! squared norm of the eta basis functions
 do igdll=1,base%nk
   norm_e2(igdll) = sum(base%wgs * base%e(igdll,:)**2)
 enddo
 do is=grid%ni+1,grid%ns
   if(bcs%b_s2bs(is)%p%bc.eq.b_dir) then ! Dirichlet side
     is_dir = is_dir+1
     dofs_dir( (is_dir-1)*base%nk+1 : is_dir*base%nk ) = &
        (/ (igdll, igdll=(is-1)*base%nk,is*base%nk-1) /)
     do igdll=1,base%nk ! projections (the eta must be orthogonal)
       lam2( (is_dir-1)*base%nk+igdll ) = &
          sum( base%wgs                                          * &
               coeff_dir(affmap(grid%s(is)%e(1)%p,                 &
                                base%xigb(:,:,grid%s(is)%isl(1))), &
                         -grid%s(is)%ie(2))                      * &
               base%e(igdll,:) )/norm_e2(igdll)
     enddo
   else
     is_int = is_int+1
     dofs_nat( (is_int-1)*base%nk+1 : is_int*base%nk ) = &
        (/ (igdll, igdll=(is-1)*base%nk,is*base%nk-1) /)
   endif
 enddo
 deallocate(norm_e2)

 !         row             column
 mmm11 = dofs_nat * (mmm * dofs_nat)
 mmm12 = dofs_nat * (mmm * dofs_dir)
 mmm21 = dofs_dir * (mmm * dofs_nat)
 mmm22 = dofs_dir * (mmm * dofs_dir)

 allocate(fff1(size(dofs_nat)))
 fff1 = fff(dofs_nat+1)

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed partitioning of the linear system: elapsed time ',t1-t0,'s.'
 call info(this_prog_name,'',message(1))
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Solution of the linear system
 t0 = my_second()

 allocate(lam1(mmm11%n),rhs(mmm11%n))
 rhs = fff1 - matmul(mmm12,lam2)

 endif mpi_if_1

 ! When the local solver is mumps, all the processes call the solver
 ! routines, otherwise only the master process calls them.
 mpi_if_2: if( (mpi_id.eq.0).or.(linsolver.eq.'mumps') ) then
   call factor(mmm11)
   call solve (mmm11,lam1,rhs)
   call clean ()
 endif mpi_if_2

 mpi_if_3: if(mpi_id.eq.0) then
 deallocate(rhs)

 ! boundary terms
 allocate(fff2(mmm22%n))
 fff2 = matmul(mmm21,lam1) + matmul(mmm22,lam2)

 ! pack back the values
 allocate(lam(size(lam1)+size(lam2)))
 lam(dofs_nat+1) = lam1
 lam(dofs_dir+1) = lam2
 fff(dofs_dir+1) = fff2

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed solution of the linear system: elapsed time ',t1-t0,'s.'
 call info(this_prog_name,'',message(1))
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Clear the variables used in the solution of the linear system
 nnz_mmm11 = mmm11%nz
 call clear( mmm11 )
 call clear( mmm12 )
 call clear( mmm21 )
 call clear( mmm22 )
 deallocate( fff1 , fff2 )
 deallocate( lam1 , lam2 )
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Postprocessings
 call mod_ldgh_reconstructions_constructor(grid,bcs,base,lam, &
                                           compute_usef)
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Error computation
 t0 = my_second()

 errnorms: if(compute_error_norms) then
   call mod_error_norms_constructor()
   
   call hybrid_err(grid,base,lam,coeff_lam,err_extra_deg, &
                   e_l2s,e_l2k,e_pl2s,e_pl2k)
   call primal_err(grid,base,uuu,coeff_lam,err_extra_deg, &
                   e_u_l2)
   call primal_err(grid,bases,uuus,coeff_lam,err_extra_deg, &
                   e_us_l2)
   if(compute_usef) &
     call primal_err_sef(grid,bases,uuusef,coeff_xiadv,xiadv_el, &
                         coeff_lam,err_extra_deg,e_usef_l2)
   call dual_err(grid,base,qqq,coeff_q,coeff_divq,err_extra_deg, &
                 e_q_l2,e_q_hdiv)
!   if(base%r.gt.0) &
!     call dual_err(base_qsdg,qqqsdg,coeff_q,coeff_divq,err_extra_deg, &
!                   e_qsdg_l2,e_qsdg_hdiv)
   call dual_err(grid,base_qsrt,qqqsrt,coeff_q,coeff_divq,err_extra_deg, &
                 e_qsrt_l2,e_qsrt_hdiv)

   call mod_error_norms_destructor()
 endif errnorms

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed error computations: elapsed time ',t1-t0,'s.'
 call info(this_prog_name,'',message(1))
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Write the results
 call new_file_unit(fu,ierr)
 out_file_results_name = trim(in_basename)//out_file_results_suff
 open(fu,file=trim(out_file_results_name), &
      status='replace',action='write',form='formatted',iostat=ierr)
   ! preamble
   call write_octave(test_name       ,'test_name'       ,fu)
   call write_octave(test_description,'test_description',fu)
   call write_octave(nnz_mmm11       ,'nnz_mmm11'       ,fu)
   ! data
   if(write_sys) then
     call write_octave(dofs_nat,'c','dofs_nat',fu)
     call write_octave(dofs_dir,'c','dofs_dir',fu)
     call write_octave(mmm,'mmm',fu)
     call write_octave(fff,'c','fff',fu)
   endif
   call write_octave(lam,'c','lam',fu)
   call write_octave(uuu,'c','uuu',fu)
   call write_octave(qqq,'c','qqq',fu)
   call write_octave(qhn,'qhn',fu)
   call write_octave(bases,'bases',fu)
   call write_octave(uuus,'c','uuus',fu)
   if(compute_usef) then
     call write_octave(uuusef,'c','uuusef',fu)
     call write_octave(xiadv_el,'c','xiadv_el',fu)
   endif
   !> \bug add reconstructions and error computations
!   if(base%k.gt.0) then
!     call write_octave(base_qsdg,'base_qsdg',fu)
!     call write_octave(qqqsdg,'c','qqqsdg',fu)
!   endif
   call write_octave(base_qsrt,'base_qsrt',fu)
   call write_octave(qqqsrt,'c','qqqsrt',fu)
   if(compute_error_norms) then
     call write_octave(e_l2s ,'e_l2s' ,fu)
     call write_octave(e_l2k ,'e_l2k' ,fu)
     call write_octave(e_pl2s,'e_pl2s',fu)
     call write_octave(e_pl2k,'e_pl2k',fu)
     call write_octave(e_u_l2,'e_u_l2',fu)
     call write_octave(e_us_l2,'e_us_l2',fu)
     if(compute_usef) &
       call write_octave(e_usef_l2,'e_usef_l2',fu)
     call write_octave(e_q_l2,'e_q_l2',fu)
     call write_octave(e_q_hdiv,'e_q_hdiv',fu)
!     if(base%k.gt.0) then
!       call write_octave(e_qsdg_l2,'e_qsdg_l2',fu)
!       call write_octave(e_qsdg_hdiv,'e_qsdg_hdiv',fu)
!     endif
     call write_octave(e_qsrt_l2,'e_qsrt_l2',fu)
     call write_octave(e_qsrt_hdiv,'e_qsrt_hdiv',fu)
   endif
 close(fu,iostat=ierr)
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Cleanup
 call mod_ldgh_reconstructions_destructor()

 deallocate( dofs_nat , dofs_dir , fff , lam )
 call clear( mmm )

 call mod_ldgh_locmat_destructor()

 call mod_tau_destructor()

 call mod_testcases_destructor()

 call clear( bcs )
 call mod_bcs_destructor()

 call clear( base )
 call mod_fe_spaces_destructor()

 call mod_base_destructor()

 call mod_numquad_destructor()

 call clear( grid )
 call mod_grid_destructor()

 call mod_master_el_destructor()

 call mod_output_control_destructor()

 endif mpi_if_3

 !-----------------------------------------------------------------------
 ! Cleanup of all the MPI processes
 !-----------------------------------------------------------------------

 call mod_linsolver_destructor()

 deallocate( bc_type_t )

 call mod_octave_io_sparse_destructor()
 call mod_sparse_destructor()

 call mod_octave_io_perms_destructor()
 call mod_perms_destructor()

 call mod_linal_destructor()

 call mod_octave_io_sympoly_destructor()
 call mod_sympoly_destructor()

 call mod_octave_io_destructor()

 call mod_fu_manager_destructor()

 call mod_mpi_utils_destructor()
 call mpi_finalize(ierr)

 call mod_kinds_destructor()

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Done: elapsed time ',t1-t00,'s.'
 call info(this_prog_name,'',message(1))

 !$ if(detailed_timing_omp) call mod_omp_utils_destructor()

 call mod_messages_destructor()
 !-----------------------------------------------------------------------

end program ldgh_main

